{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "92f5ec54",
   "metadata": {},
   "source": [
    "## String functions on Columns"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "877b8bc5",
   "metadata": {},
   "source": [
    "### EDA   for columns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "06e8f3f1",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "373789d6",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>First Name</th>\n",
       "      <th>Age</th>\n",
       "      <th>Gender</th>\n",
       "      <th>City</th>\n",
       "      <th>Place of Work</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Sahil</td>\n",
       "      <td>10</td>\n",
       "      <td>M</td>\n",
       "      <td>J</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Sonia</td>\n",
       "      <td>20</td>\n",
       "      <td>F</td>\n",
       "      <td>K</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Sourav</td>\n",
       "      <td>30</td>\n",
       "      <td>M</td>\n",
       "      <td>L</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Vishal</td>\n",
       "      <td>40</td>\n",
       "      <td>M</td>\n",
       "      <td>P</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  First Name  Age Gender City  Place of Work\n",
       "0      Sahil   10      M    J           True\n",
       "1      Sonia   20      F    K          False\n",
       "2     Sourav   30      M    L          False\n",
       "3     Vishal   40      M    P           True"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df=pd.DataFrame({\n",
    "'First Name':['Sahil','Sonia','Sourav','Vishal'],\n",
    "'Age':[10,20,30,40],\n",
    "'Gender':['M','F','M','M'],\n",
    "'City':['J','K','L','P'],\n",
    "'Place of Work':[True,False,False,True],\n",
    "}\n",
    ")\n",
    "df"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "38e34482",
   "metadata": {},
   "source": [
    "#### Get columns as list"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "e4d76140",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['First Name', 'Age', 'Gender', 'City', 'Place of Work']"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.columns.tolist()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "102fad7d",
   "metadata": {},
   "source": [
    "#### Convert column names to series | df:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "ffe89310",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "First Name          First Name\n",
       "Age                        Age\n",
       "Gender                  Gender\n",
       "City                      City\n",
       "Place of Work    Place of Work\n",
       "dtype: object"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.columns.to_series()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "f8870e68",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>First Name</th>\n",
       "      <td>First Name</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Age</th>\n",
       "      <td>Age</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Gender</th>\n",
       "      <td>Gender</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>City</th>\n",
       "      <td>City</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Place of Work</th>\n",
       "      <td>Place of Work</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                           0\n",
       "First Name        First Name\n",
       "Age                      Age\n",
       "Gender                Gender\n",
       "City                    City\n",
       "Place of Work  Place of Work"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.columns.to_frame()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "d794ed40",
   "metadata": {},
   "source": [
    "#### Check if specific column is there or not"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "939a91ff",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([ True, False, False, False, False])"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.columns.str.contains('Name') "
   ]
  },
  {
   "cell_type": "markdown",
   "id": "4d7d7886",
   "metadata": {},
   "source": [
    "#### Check if any duplicate column is there"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "id": "1d5b6490",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([False, False, False, False, False])"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.columns.duplicated()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "55079b22",
   "metadata": {},
   "source": [
    "#### Check methods/attributes of String"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "id": "12107440",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['__annotations__', '__class__', '__delattr__', '__dict__', '__dir__']"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "dir(df.columns.str)[0:5]"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "e185987b",
   "metadata": {},
   "source": [
    "#### Make column names to lower case"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "id": "9b96b260",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Index(['first name', 'age', 'gender', 'city', 'place of work'], dtype='object')"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.columns.str.lower()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "f34c789f",
   "metadata": {},
   "source": [
    "#### Make column names to Upper case"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "id": "e74ca52a",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Index(['FIRST NAME', 'AGE', 'GENDER', 'CITY', 'PLACE OF WORK'], dtype='object')"
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.columns.str.upper()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "f41bfa14",
   "metadata": {},
   "source": [
    "#### Make column names to Title case"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "id": "b1c5a382",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Index(['First Name', 'Age', 'Gender', 'City', 'Place Of Work'], dtype='object')"
      ]
     },
     "execution_count": 26,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.columns.str.title()  # Camel Case"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "bce613bd",
   "metadata": {},
   "source": [
    "#### Make column names to Capitalize"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "id": "42cce642",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Index(['First name', 'Age', 'Gender', 'City', 'Place of work'], dtype='object')"
      ]
     },
     "execution_count": 28,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.columns.str.capitalize() # Only first letter big"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "3367e158",
   "metadata": {},
   "source": [
    "#### Replace empty spaces with underscores"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "id": "ce8a4321",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Index(['First-Name', 'Age', 'Gender', 'City', 'Place-of-Work'], dtype='object')"
      ]
     },
     "execution_count": 29,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.columns.str.replace(' ','-')"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "3ce3d474",
   "metadata": {},
   "source": [
    "#### Rename columns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "id": "f1c0b479",
   "metadata": {},
   "outputs": [],
   "source": [
    "df.rename(columns={'oldname':'newname'},inplace=True)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "17232ae6",
   "metadata": {},
   "source": [
    "#### Check total number of columns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "id": "fb541c61",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "5"
      ]
     },
     "execution_count": 32,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(df.columns)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "4448982c",
   "metadata": {},
   "source": [
    "#### Select particular columns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "id": "8c25c05f",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array(['First Name', 'Age', 'Gender', 'City'], dtype=object)"
      ]
     },
     "execution_count": 33,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.columns.values[0:4]"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "a271a055",
   "metadata": {},
   "source": [
    "#### Get 2nd column and rename it"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "id": "80ddb35d",
   "metadata": {},
   "outputs": [],
   "source": [
    "df.columns.values[2]='DOB'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "id": "16c3bccc",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>First Name</th>\n",
       "      <th>Age</th>\n",
       "      <th>DOB</th>\n",
       "      <th>City</th>\n",
       "      <th>Place of Work</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Sahil</td>\n",
       "      <td>10</td>\n",
       "      <td>M</td>\n",
       "      <td>J</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Sonia</td>\n",
       "      <td>20</td>\n",
       "      <td>F</td>\n",
       "      <td>K</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Sourav</td>\n",
       "      <td>30</td>\n",
       "      <td>M</td>\n",
       "      <td>L</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Vishal</td>\n",
       "      <td>40</td>\n",
       "      <td>M</td>\n",
       "      <td>P</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  First Name  Age DOB City  Place of Work\n",
       "0      Sahil   10   M    J           True\n",
       "1      Sonia   20   F    K          False\n",
       "2     Sourav   30   M    L          False\n",
       "3     Vishal   40   M    P           True"
      ]
     },
     "execution_count": 35,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "e9eea57a",
   "metadata": {},
   "source": [
    "#### Select all columns except one"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "id": "0cc2184f",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Index(['First Name', 'Age', 'City', 'Place of Work'], dtype='object')"
      ]
     },
     "execution_count": 40,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.columns[df.columns!= 'DOB']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "id": "04f9d131",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>First Name</th>\n",
       "      <th>Age</th>\n",
       "      <th>DOB</th>\n",
       "      <th>City</th>\n",
       "      <th>Place of Work</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Sahil</td>\n",
       "      <td>10</td>\n",
       "      <td>M</td>\n",
       "      <td>J</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Sonia</td>\n",
       "      <td>20</td>\n",
       "      <td>F</td>\n",
       "      <td>K</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Sourav</td>\n",
       "      <td>30</td>\n",
       "      <td>M</td>\n",
       "      <td>L</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Vishal</td>\n",
       "      <td>40</td>\n",
       "      <td>M</td>\n",
       "      <td>P</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  First Name  Age DOB City  Place of Work\n",
       "0      Sahil   10   M    J           True\n",
       "1      Sonia   20   F    K          False\n",
       "2     Sourav   30   M    L          False\n",
       "3     Vishal   40   M    P           True"
      ]
     },
     "execution_count": 41,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "d41853db",
   "metadata": {},
   "source": [
    "#### Select all columns except multiple"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "id": "06ca1e33",
   "metadata": {},
   "outputs": [
    {
     "ename": "AttributeError",
     "evalue": "'numpy.ndarray' object has no attribute 'columns'",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mAttributeError\u001b[0m                            Traceback (most recent call last)",
      "\u001b[1;32m<ipython-input-43-3cca24522c75>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mdf\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mloc\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m-\u001b[0m\u001b[0mdf\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0misin\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m'DOB'\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;34m'City'\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m",
      "\u001b[1;31mAttributeError\u001b[0m: 'numpy.ndarray' object has no attribute 'columns'"
     ]
    }
   ],
   "source": [
    "#?\n",
    "df.loc[:,-df.columns.isin(['DOB','City']).columns]"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "1d592bf9",
   "metadata": {},
   "source": [
    "#### Select column names that begins with particular word"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 48,
   "id": "a71af597",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([ True, False, False, False, False])"
      ]
     },
     "execution_count": 48,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.columns.str.startswith('First')\n",
    "# Gives array of booleans"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "ca67d9aa",
   "metadata": {},
   "source": [
    "#### Select group of column names"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "id": "9d6ffb19",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array(['First Name', 'Age', 'DOB'], dtype=object)"
      ]
     },
     "execution_count": 46,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.columns.values[[0,1,2]]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 47,
   "id": "71ea54bb",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Index(['First Name', 'Age', 'DOB'], dtype='object')"
      ]
     },
     "execution_count": 47,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.columns[0:3]"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.8"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}